
###练习2：本地网络信息的提取
###难点：如何统计多个标签

from bs4 import BeautifulSoup #导入BeautifulSoup库

path = "./web/new_index.html" #本地文件的路径

with open(path, 'r') as web_data: #使用with open 打开本地文件
    Soup = BeautifulSoup(web_data, "lxml") #使用BeautifulSoup对网页文件进行解析

titles = Soup.select(" div.article-info > h3 > a") #获取所有文章的题目
images = Soup.select(" ul > li > img") #获取所有文章的图片
contents = Soup.select("div.article-info > p.description") #获取所有文章的内容描述
scores = Soup.select(" div.rate > span") #获取所有文章的评分
labels = Soup.select(" p.meta-info") #获取所有文章的标签

for title, image, content, score,label in zip(titles, images, contents, scores, labels):
   data = {
        "Title":title.get_text(),
        "Image":image.get("src"),
       "Content":content.get_text(),
       "Score":score.get_text(),
       "Label": list(label.stripped_strings) # 多个标签都在一个段落，要用list集合
    }
   print(data)